import os, geopandas as gpd

states=gpd.read_file(os.path.join("Geodataframes","USA_States.shp"))
type(states)
geopandas.geodataframe.GeoDataFrame
states.shape
(51, 4)
states.columns
Index(['STATE_NAME', 'STATE_FIPS', 'STATE_ABBR', 'geometry'], dtype='object')
states.head()
states[states.isna().any(axis=1)]
states.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 STATE_NAME 51 non-null object
1 STATE_FIPS 51 non-null object
2 STATE_ABBR 51 non-null object
3 geometry 51 non-null geometry
dtypes: geometry(1), object(3)
memory usage: 1.7+ KB
states.plot()
<AxesSubplot:>
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
capitals=gpd.read_file(os.path.join("Geodataframes","USA_Capitals.shp"))
rivers=gpd.read_file(os.path.join("Geodataframes","USA_Hydrography.shp"))
lakes=gpd.read_file(os.path.join("Geodataframes","USA_Lakes.shp"))
capitals.plot()
<AxesSubplot:>
rivers.plot()
<AxesSubplot:>
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
lakes.plot()
<AxesSubplot:>
#visualizamos como queremos nuestras capas. Elegimos colores, relleno y el grosor de la linea
states.plot(facecolor="#EEF1F5",#color de relleno
               edgecolor='black'#color de las lineas del contorno
               linewidth=0.2#grosor de la linea
<AxesSubplot:>
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
capitals.plot(marker='.'#forma del marcador
            color='red'
            markersize=4,
            alpha=0.3#transparencia
<AxesSubplot:>
rivers.plot(edgecolor='#38B5C6'
            linewidth=0.5)
<AxesSubplot:>
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#verificamos que todos tengan el mismo crs
lakes.crs
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
capitals.crs
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
states.crs
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
rivers.crs #todos tienen el mismo número de crs
#No es necesario usar to_crs()
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
#Armamos el mapa completo con todas las capas y lo personalizamos
base = states.plot(facecolor="#ECF0F1"edgecolor='#2A2A2A'linewidth=0.1,figsize=(12,12))
rivers.plot(edgecolor='#2399B4'linewidth=0.4,
            ax=base)
lakes.plot(edgecolor="#2399B4"facecolor="#A9E8F7"linewidth=0.3,
           ax=base)
capitals.plot(marker='.'color='red'markersize=2,alpha=0.7,
            ax=base)
<AxesSubplot:>
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#Si deseamos un mapa donde se pueda interactuar con las capas
import folium


m = states.explore(color="#DDCDFC"
                   name="states")

m = rivers.explore(m=m, color="#2399B4",
                   name="rivers")

m = lakes.explore(m=m, color="#1075F5",
                   name="lakes")

m = capitals.explore(m=m, color="red",
                   name="capitals")

#Para poder navegar entre capas y seleccionar la que queramos usar
folium.LayerControl().add_to(m)

m
#Seleccionamos el estado de Montana para delimitar el mapa
montana=states[states.STATE_NAME=='Montana']
#Recortamos solo la parte de Montana en las capas
states_clipped = gpd.clip(gdf=states,
                          mask=montana)
rivers_clipped = gpd.clip(gdf=rivers,
                               mask=montana)
lakes_clipped = gpd.clip(gdf=lakes,
                               mask=montana)
capitals_clipped = gpd.clip(gdf=capitals,
                               mask=montana)
#Una vez realizado el corte del mapa podemos visualizarlo y personalizarlo
base = montana.plot(facecolor="#FBB216"edgecolor='#5C45A0'linewidth=0.4,figsize=(6,6))
capitals_clipped.plot(marker='+'color='red'markersize=18,ax=base)
rivers_clipped.plot(edgecolor='blue'linewidth=0.5,ax=base)
lakes_clipped.plot(edgecolor='blue',facecolor="#1075F5"linewidth=0.5,ax=base)
<AxesSubplot:>
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#Ubicamos las coordenadas de Montana
montanaCoord=[46.59271, -112.03611]
#Hacemos zoom en Montana en el mapa interactivo
m = states.explore(location=montanaCoord,
                   zoom_start=5.5,
                   tiles='CartoDB positron',
                   color='#DDCDFC',
                   name="states"
m = rivers.explore(m=m, color="#2399B4",
                   name="rivers")
m = lakes.explore(m=m, color="#1075F5",
                   name="lakes")
m = capitals.explore(m=m, color="red",
                   name="capitals")
folium.LayerControl().add_to(m) 
m
#Guardemos el mapa no interactivo de Montana
import matplotlib.pyplot as plt

base = montana.plot(facecolor="#FBB216"edgecolor='#5C45A0'linewidth=0.4,figsize=(5,5))
map2=rivers_clipped.plot(edgecolor='blue'linewidth=0.5,ax=base)
map3=lakes_clipped.plot(edgecolor='blue',facecolor="#1075F5"linewidth=0.5,ax=base)
mapEnd=capitals_clipped.plot(marker='+'color='red'markersize=15,ax=base)
plt.savefig(os.path.join("Maps",'mapEnd.jpg')) 
#Lo guardamos en la carpeta Maps en el datalore
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
#Finalmente exportamos los mapas en un tipo diferente de archivo
states.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='states'driver="GPKG")
rivers.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='rivers'driver="GPKG")
lakes.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='lakes'driver="GPKG")
capitals.to_file(os.path.join("Maps","worldMaps.gpkg"), layer='capitals'driver="GPKG")
#Grabamos el link del github
worldMaps="https://github.com/ThayraSosa/introgdf/raw/main/Maps/worldMaps.gpkg"
from  fiona import listlayers

listlayers(worldMaps)
['states', 'rivers', 'lakes', 'capitals']
rivers=gpd.read_file(worldMaps,layer='rivers')
lakes=gpd.read_file(worldMaps,layer='lakes')
capitals=gpd.read_file(worldMaps,layer='capitals')
states=gpd.read_file(worldMaps,layer='states')
base = states.plot(facecolor='gainsboro')
capitals.plot(ax=base, markersize=0.5color='red'
lakes.plot(ax=base, linewidth=0.5)
rivers.plot(ax=base, linewidth=0.5)
<AxesSubplot:>
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:
/opt/python/envs/default/lib/python3.8/site-packages/geopandas/plotting.py:51: ShapelyDeprecationWarning: The 'type' attribute is deprecated, and will be removed in the future. You can use the 'geom_type' attribute instead.
if geom is not None and geom.type.startswith(prefix) and not geom.is_empty:

Working with data

Video tutorial

Plug in multiple data sources

In Datalore you can work with various data sources together in one notebook.

You can connect your SQL databases (such as MySQL, Snowflake, PostgreSQL, Redshift, etc.), bucket storages (AWS S3, GCS buckets), and files (any file types) from the interface and further query and join them in one notebook.

To manage all your data, please use the Attached data tab in the left-hand sidebar.

Reusing data connections

When creating a bucket or a database connection, you’ll be able to reuse it across other notebooks in the same workspace.

You can manage all of the attached data sources from the workspace file system and add new connections directly from the notebook interface.

When you share a notebook or a workspace, your credentials are not exposed to the environment.

Persistent file storage

Datalore comes with persistent storage for each notebook. This means you can upload your data files directly to Notebook files. If you want to share a file across multiple notebooks, attach Workspace files to the notebook and upload it there. Workspace files are mounted under the /data/workspace_files/ directory.

Benefits of attaching files to a notebook

  • Your workspace file system won't be cluttered with too many files.
  • Notebook files are shared automatically when you invite collaborators to the notebook.

Additionally, you can download data to Notebook or Workspace files or to store it in memory using various Python packages and APIs.

Dataframe exploration

Whenever a pandas dataframe is the result of your cell execution, you get additional tabs in the cell output:

  • Table – a scrollable table view of your data
  • Raw – this tab represents the raw output without the ability to scroll the data
  • Visualize – this tab brings out-of-the box plots to help you visually explore the data
  • Statistics – this tab provides essential descriptive statistics for your dataframe

Task: Run the code cell above and navigate to the Visualize and Statistics tabs!

import urllib
import pandas as pd

urllib.request.urlretrieve('https://datalore-samples.s3.eu-west-1.amazonaws.com/datalore_gallery_of_samples/Getting+started/gpus.csv''gpus.csv')

data = pd.read_csv("gpus.csv")
data

Viewing and editing attached files

Double-clicking on a .csv or text file opens it for editing in the right sidebar editor and lets you view the file and edit its contents.

If you open a .py script, you will also get smart coding assistance features for editing its contents.

Managing the environment

Video tutorial

Preconfigured environment

Each notebook in Datalore has an isolated environment. This means that when you apply changes to one notebook, they won't affect any of the other notebook environments.

Datalore comes with a lot of Python libraries pre-installed.

We've already installed pandas, NumPy, sklearn, MatplotLib, and Seaborn, so you can start importing the package you need right away.

Datalore supports both the pip and Conda package managers. Pip is chosen by default, but you can always switch to Conda.

Installing additional libraries

  • To install additional libraries, upgrade package versions, and remove libraries, go to Environment | Explore tab. The changes will be written into the .yaml file, which will be stored in your Notebook files.
  • To install a package from a Git repository, go to Environment | Repositories.
  • To install any other dependencies (f.e. non-python dependencies), you can modify the init.sh file. It will run before the environment is installed.

💡 Packages installed via the Environment tab are persisted when you reopen notebooks. You can also install packages using pip magic commands or Terminal, but they won't be persisted.

You can learn more about this in the Environment manager documentation.

Task: Install and import the datasets library

  1. Run the code cell below using Shift+Enter.
  2. Click on the prompt in the error log to search for the datasets library. This prompt will open the Explore tab of the Environment manager.
  3. Click on the datasets library.
  4. Click the Install button.
  5. Restart the kernel.
  6. Rerun the cell.
import datasets

datasets.__version__
ModuleNotFoundError: No module named 'datasets'

Collaborating with your team

Video tutorial

Sharing a notebook

In Datalore, you can edit notebooks together with your team in real time.

Click on the Share button in the top-right corner and choose your preferred sharing method:

  • Share by sending a link (the simpler option).
  • Invite collaborators by email (for more granular permissions).
  • Share with groups of collaborators. Please contact your Datalore admins to find out whether you have groups integrated with Datalore.

💡 To access notebooks as collaborators, invited users will need to create a Datalore account.

When sharing a notebook you can provide either edit or view access.

  • Edit access will allow collaborators to edit code and attached files and run computations.

    ⚠️ Note that collaborators will consume the notebook owner's resources.

  • View access will only allow users to see the real-time representation of the notebook.

To track the changes, Datalore has a built-in version control system where you can create history checkpoints and revert the notebook to past versions. Check this out via Tools | History.

You can read more about notebook sharing here.

Task: Invite your colleagues to Datalore!

Try out the real-time collaboration feature with your colleagues. Working together can be a lot of fun. 🚀

To track your collaborator's actions through the notebook and attached files, click on their avatar in the upper right-hand corner and start following along!

Sharing a workspace

In Datalore, you can create and share workspaces.

Workspaces help you organize your work and allow you to easily share multiple notebooks, data connections, files, and reports with your team.

⚠️ Note that the workspace owner's resources will be consumed for all the computations made in the workspace.

Reporting

Video tutorial

Report builder

To share your research results with stakeholders, you can use the Report builder feature via the Tools menu section or by clicking the Build report button in the upper right-hand corner.

You will be able to:

  • Arrange the cells on a canvas to make the report look more dashboard-style.
  • Hide specific cell inputs and outputs.
  • Publish a static or interactive report.

Task: Create a report out of this notebook!

Sharing reports

After you publish a report, it will become available under a link. You can then share it with colleagues even if they don't have a Datalore account – the report will be available for them inside the browser. Each report viewer will get a separate copy of the report and will be able to interact with the controls and rerun the report independently.

You can also access all the workspace reports from the Published reports section in your Workspace file system.

Export as PDF, PY, IPYNB

You can export notebooks in multiple formats, including PDF.

Go to the File menu tab and select the export option you need.

Automations

Video tutorial

Scheduled runs

In Datalore, you can schedule your notebook to run on a regular basis. Go to the Computation tab and create a schedule in the Scheduled runs widget. You'll be able to configure the running interval by using the dropdowns or by specifying a cron string.

Scheduled report updates

When configuring notebook schedules, you can choose to automatically update published reports, delivering regular updates to your stakeholders.

Managing the schedules

You can view and edit all the schedules of the workspace from the file system. You'll be able to view your run results and change the scheduling settings.

Switching between CPU and GPU

When running a notebook, you can choose between available machines according to your needs. The Computation tab will also show you the CPU and RAM load statistics. The computation status bar is located in the bottom right-hand corner of the editor.

Background computation

Switching on Background computation from the Computation tab allows you to keep notebooks running even if you close the browser tab.

Learn more about Background computation here.

⚠️ Be careful when switching on Background computation, as it will consume your computation quota.

Shortcuts 101

Datalore supports a wide variety of Jupyter and PyCharm shortcuts. You can access the full list from Help | Shortcuts or by pressing Shift+F1.

Command mode and Editor mode

There are two modes for shortcuts: Command mode and Editor mode.

When editing a cell's content you are in Editor mode. To access Command mode, which allows you to manipulate the cells themselves, press Esc. To switch back to Editor mode, press Enter.

Some of the most often used shortcuts include:

  • Run the selected cell and select below: Shift+Enter.
  • Change cell type: Command+M/Ctrl+M.
  • Undo action: Command+Z/Ctrl+Z.
  • Delete cell: DD (Command mode).
  • Insert cell above: A (Command mode).
  • Insert cell below: B (Command mode).
  • Copy selected cells: C (Command mode).
  • Cut selected cells: X (Command mode).
  • Paste below: V (Command mode).

Command palette

Access quick actions using the Command palette from the Help menu tab.

How to get support

What really helps us make the Datalore product better is your feedback. Our team is always thankful when you tell us about your experience or report bugs.

Feel free to share your feedback with us and report any issues by:

Created using Figma